/*------------------------------------------------------------------------------------------------------------------------------*/
/*       Project name: Validation                                                                                               */
/*          Objective: 1. To validate the risk factor pattern of cognition in continuous NHANES                                 */
/*            Dataset: NHANES 2011-2014 adults>=20                                                                              */
/*         Programmer: Longgang Zhao (lz7@email.sc.edu)                                                                         */
/*               Date: 7/15/2022 -- 8/15/2022                                                                                   */
/*           Exposure: Risk factors of cognition                                                                                */
/*            Outcome: cognition                                                                                                */
/*         Covariates: see bellow                                                                                               */
/*               Note:                                                                                                          */
/*------------------------------------------------------------------------------------------------------------------------------*/
/*Validation study using NHANES1999-2018*/

/*Scoring details based on model with AIC=2112.6
Age ---------------------------   0.06
Waist circumference -----------  -0.02 
Social connect with phone -----  -0.02
Social connect with church ----  -0.003
Race with others --------------   0.77
Race with black ---------------   0.46
Education ---------------------  -0.74
Income high -------------------  -0.89
Income middle -----------------  -0.34
Physical activity moderately --  -0.15
Physical activity vigorously --   0.22
Diabetes ----------------------   0.28
Hypercholesterolemia ----------  -0.52
Annual visit of dentists ------  -0.54
************************************************/

/*Macros and notes*/
%macro NHANES(dataset);
%if %index(%substr(&dataset,%length(&dataset)-1,1),-)=0 %then %do; %let cycle=1999-2000; %end;
%let varlist=B C D E F G H I J K;
%do i = 1 %to 10; %let var=%scan(&varlist,&i);%let year=%sysfunc(cat(%eval(&i*2+1999),-,%eval(&i*2+2000)));
 %if %index(%substr(&dataset,%length(&dataset),1),&var)=1 %then %do; %let cycle=&year; %end;
%end;
filename xptIn url "https://wwwn.cdc.gov/nchs/nhanes/&cycle./&dataset..xpt"; 
libname xptIn xport;
proc copy in=xptIN out=work; run;
proc sort data=&dataset; by SEQN; run;
proc means n nmiss min max mean std data=&dataset; var _numeric_; run;
%mend NHANES;

%macro histogram (datain,varlist);
%let varnumber=1; %do %while (%scan(&varlist.,&varnumber.) ne );%let var=%scan(&varlist.,&varnumber.);
title "Distribution of &var";proc sgplot data=&datain; histogram &var;run;
%let varnumber=%eval(&varnumber.+1); %end;
title;
%mend;

%macro cat(dat,cat);proc freq data=&dat; tables &cat/norow nocol nopercent;run;%mend;
%macro con(dat,con);proc means data=&dat n nmiss min p1 p25 p50 p75 p99 max; var &con; run;%mend;

proc format library=dataloc;
    value Age2gp 0='<60' 1='60-<70' 2='>=70';
    value racegp 1='white' 2='black' 3='OTHER';
    value sex 1='Male' 2='Female';
    value edugp 1='1_<12 yrs' 2='>=12 yrs';
    value incomegp  1='LOW_PIR=<1.3' 2='MIDDLE_PIR=<3.5' 3='HIGH_PIR>3.5';
    value smkgp 1='NonSmoker' 2='Ever Smoker' 3='Current Smoker';
    value drinkgp 0='NonDrinker' 1='Drinker';
    value bmigp 1='NORMAL' 2='OVERWEIHGT' 3='OBESITY';
    value rpagp 1='Vigorously active' 2='Moderately active' 3='Sedentary'; 
    value yesno 1='YES' 0='NO';
    value yvisit 1='YES' 2='NO';
    value perios 1='Moderate or severe' 0='No disease' ;
run;

libname datloc "C:\Users\lz7\OneDrive - University of South Carolina\AD risk factors\Summer GA 2022\Data";
libname cova   "C:\Users\lz7\OneDrive - University of South Carolina\Finished projects\EPID 788 2021\NHANES project\Dataset";
options fmtsearch=(cova); /*cova.all07_18_updated09282021*/
%include "C:\Users\lz7\OneDrive - University of South Carolina\Tools\SASMacro\Macro_describe_perct.sas";/*Table 1 macro*/
options fmtsearch=(dataloc);

/*************************************************************************************************************************/
/*                                              Part I : Variables readin                                                */
/*************************************************************************************************************************/

/*Age Agegp racegp sex edugp incomegp smkgp drinkgp bmigp rpagp 
  bmi WC HEI SBP DBP WBC GBP CRP TCP LCP HDP TGP LPP cognition
  scl_phone scl_together scl_neighb scl_church scl_meeting scl_clubs
  diabetes hsCVD HP Cancer hsChol drug_hc anxiety depression 
  yvisit perios*/

/*scl_phone scl_together scl_neighb scl_church scl_meeting scl_clubs*/

/*Cognition*/
%NHANES(CFQ_G); %NHANES(CFQ_H);
data CFQ; set CFQ_G CFQ_H; 
IRT=CFDCIR;
DRT=CFDCSR;
AFT=CFDAST;
DSST=CFDDS;
run;
proc means mean std data=CFQ; var IRT DRT AFT DSST; run;
/*
                               Variable            Mean         Std Dev
                               ����������������������������������������
                               IRT            0.2914267       0.7005164
                               DRT            5.7738324       2.4109815
                               AFT           16.3205788       5.5519241
                               DSST          45.4611812      17.4391832
                               ����������������������������������������
*/
data CFQ; set CFQ;
z_IRT=(IRT-0.2914267)/0.7005164;
z_DRT=(DRT-5.7738324)/2.4109815;
z_AFT=(AFT-16.3205788)/5.5519241;
z_DSST=(DSST-45.4611812)/17.4391832;
cognition=sum(z_IRT,z_DRT,z_AFT,z_DSST); /*Z score reference: PMID: 29654035*/
if cognition=.                then cogn_cat=.;
 else if cognition<=-3.0793407 then cogn_cat=1; /*bottom 10%*/
  else                               cogn_cat=0;
format cogn_cat yesno.;
run;
proc univariate data=CFQ; var cognition; histogram; run;
proc freq data=CFQ; table cogn_cat; run;

/*===============Demo related variables coding======================*/
%NHANES(DEMO_G); %NHANES(DEMO_H);
data DEMO; set DEMO_G DEMO_H; 
/*Age*/
Age=RIDAGEYR;
 if Age<60      then Agegp=0;
  else if Age<70 then Agegp=1; 
   else                Agegp=2; 
/*Sex*/  
sex=RIAGENDR;
/*Race*/
if RIDRETH1 =3     then racegp=1;
 else if RIDRETH1=4 then racegp=2;
  else                    racegp=3;
/*Education*/
education=DMDEDUC2;
if education in (.,7,9)    then edugp=.;
 else if education in (1,2) then edugp=1;
  else                            edugp=2;
/*Income: PIR*/
if 0=< INDFMPIR <=1.3   then incomegp=1; 
 else if INDFMPIR <=3.5  then incomegp=2; 
  else if INDFMPIR >3.5   then incomegp=3; 
   else                         incomegp=.;
format Agegp Age2gp. racegp racegp. sex sex. edugp edugp. incomegp incomegp.;
run;
proc freq data=DEMO; tables Agegp racegp edugp incomegp; run;

/*===============Smoking coding======================*/
%NHANES(SMQ_G); %NHANES(SMQ_H);
data SMQ; set SMQ_G SMQ_H; 
if SMQ020 in (.,2,7,9) then smkgp=1;
 else if 0<SMQ050Q<10000 then smkgp=2;
  else                         smkgp=3;
format smkgp smkgp.;
run;
proc freq data=SMQ; tables smkgp; run;

/*===============Drinking coding======================*/
%NHANES(ALQ_G); %NHANES(ALQ_H);
data ALQ; set ALQ_G ALQ_H; 
if ALQ101 = 2      then drinkgp=0;
 else if ALQ101 = 1 then drinkgp=1; 
  else                     drinkgp=.;
format drinkgp drinkgp.;
run;
proc freq data=ALQ; tables drinkgp; run;

/*===============Body measurement variables coding======================*/
%NHANES(BMX_G); %NHANES(BMX_H);
data BMX; set BMX_G BMX_H; 
/*BMI*/
bmi=BMXBMI;
if BMXBMI = .         then bmigp = .;
 else if BMXBMI<25     then bmigp = 1;
  else if 25<=BMXBMI<30 then bmigp = 2;
   else if BMXBMI>=30    then bmigp = 3;
/*Waist circumference*/
WC = BMXWAIST;
format bmigp bmigp.;
run;
proc freq data=BMX; tables bmigp; run;
proc means data=BMX n nmiss min max median; var bmi WC; run;

/*===============Physical activity coding======================*/
%NHANES(PAQ_G); %NHANES(PAQ_H);
data PAQ; set PAQ_G PAQ_H; 
array paday  {*} PAQ640 PAQ655 PAQ670;
array palevel{*} PAD645 PAD660 PAD675;
 do i=1 to dim(paday);
 if paday{i}   in (.,77,99)     then paday{i}=0;
 if palevel{i} in (.,7777,9999) then palevel{i}=0;
 end; drop i;
rpa=(paq640*pad645*4)+(paq655*pad660*8)+(paq670*pad675*4);
if rpa=. then rpagp=.;
 else if rpa = 0 then rpagp=1;
  else if rpa < 1280 then rpagp=2; /*cut-point: median in rpa>0*/
   else                    rpagp=3;
format rpa rpagp.;
run;
proc freq data=PAQ; tables rpagp; run;
proc means data=PAQ n nmiss min max median; var rpa; run;

/*===============Dietary quality coding======================*/
data HEI; set datloc.hei2015_07_18;
 HEI=HEI2015_TOTAL_SCORE;
run;
proc means data=HEI n nmiss min max median; var HEI; run;

/*===============Anxiety and depression coding======================*/
/*Anxiety*/
%NHANES(HSQ_G); /*only available for 2011-2012*/ 
data HSQ; set HSQ_G;
if HSQ496 in (.,77,99) then anxiety=.;
 else if HSQ496>=14 then anxiety=1;
  else                    anxiety=0;
format anxiety yesno.;
run;
proc freq data=HSQ; tables anxiety; run;

/*Depression*/
%NHANES(DPQ_G); %NHANES(DPQ_H);
data DPQ; set DPQ_G DPQ_H; 
array DPQ {*} DPQ010 DPQ020 DPQ030 DPQ040 DPQ050 DPQ060 DPQ070 DPQ080 DPQ090;
do i=1 to dim(DPQ);
 if DPQ{i} in (.,7,9) then DPQ{i}=.;
end;
depressionscore = sum (DPQ010, DPQ020, DPQ030, DPQ040, DPQ050, DPQ060, DPQ070, DPQ080, DPQ090);
if depressionscore=.        then depression=.;
 else if depressionscore>=10 then depression=1;
  else                             depression=0; /*cut-point reference: PMID: 11556941*/
format depression yesno.;
run;
proc freq data=DPQ; tables depression; run;

/*=================Disease history======================*/
/*diabetes*/
%NHANES(DIQ_G); %NHANES(DIQ_H);
data DIQ; set DIQ_G DIQ_H; 
if DIQ010=1 then hsDM=1;
 else                  hsDM=0;
format hsDM yesno.;
run;
proc freq data=DIQ; tables hsDM; run;

%NHANES(MCQ_G); %NHANES(MCQ_H);
data MCQ; set MCQ_G MCQ_H; 
/*CVD/stroke*/
if  MCQ160b=1 or MCQ160c=1 or MCQ160d=1 or MCQ160e=1 or MCQ160f=1 then hsCVD=1;
  else         hsCVD=0;
/*cancer*/
if  MCQ220=1 then hsCancer=1;
  else             hsCancer=0;
format hsCVD yesno. hsCancer yesno.;
run;
proc freq data=MCQ; tables hsCVD hsCancer; run;

%NHANES(BPQ_G); %NHANES(BPQ_H);
data BPQ; set BPQ_G BPQ_H; 
/*hypertension*/
if  BPQ020=1 then hsHP=1;
  else             hsHP=0;
/*hypercholesterolemia*/
if  BPQ080=1 then hsChol=1;
  else             hsChol=0;
format hsHP yesno. hsChol yesno.;
run;
proc freq data=BPQ; tables hsHP hsChol; run;

/*=================Biomarker coding======================*/
/*SBP DBP*/
%NHANES(BPX_G); %NHANES(BPX_H);
data BPX; set BPX_G BPX_H; 
 SBP=mean(BPXSY1,BPXSY2,BPXSY3);
 DBP=mean(BPXDI1,BPXDI2,BPXDI3);
run;
proc means data=BPX n nmiss min max median; var SBP DBP; run;
/*WBC*/
%NHANES(CBC_G); %NHANES(CBC_H);
data CBC; set CBC_G CBC_H; 
WBC = LBXWBCSI;
run;
proc means data=CBC n nmiss min max median; var WBC; run;
/*fibrinogen-GBP--not available after 2003*/
/*CRP--not available between 2011-2014*/
/*total cholesterol--TCP;*/
%NHANES(TCHOL_G); %NHANES(TCHOL_H);
data TCHOL; set TCHOL_G TCHOL_H; 
TCP = LBXTC;
run;
proc means data=TCHOL n nmiss min max median; var TCP; run;
/*LDL cholesterol---LCP+triglycerides---TGP;*/
%NHANES(TRIGLY_G); %NHANES(TRIGLY_H);
data TRIGLY; set TRIGLY_G TRIGLY_H; 
LCP = LBDLDL;
TGP = LBXTR;
run;
proc means data=TRIGLY n nmiss min max median; var LCP TGP; run;
/*HDL cholesterol--HDP;*/
%NHANES(HDL_G); %NHANES(HDL_H);
data HDL; set HDL_G HDL_H; 
HDP = LBDHDD;
run;
proc means data=HDL n nmiss min max median; var HDP; run;
/*=================Dental coding======================*/
%NHANES(OHQ_G); %NHANES(OHQ_H);
data OHQ; set OHQ_G OHQ_H; 
/*Annual visits to dentist*/
if OHQ030 in (1,2) then yvisit=1;
 else if OHQ030 in (3,4,5,6,7) then yvisit=2;
  else                               yvisit=.; 
/*periodental disease*/
if OHQ835=1      then perios=1;
 else if OHQ835=2 then perios=0;
  else                  perios=.;
format yvisit yvisit. perios perios.;
run;
proc freq data=OHQ; tables yvisit perios; run;

/*=================check mssing======================*/
proc sort data=DEMO;  by SEQN; run;
proc sort data=SMQ;   by SEQN; run;
proc sort data=ALQ;   by SEQN; run;
proc sort data=BMX;   by SEQN; run;
proc sort data=PAQ;   by SEQN; run;
proc sort data=HEI;   by SEQN; run;
proc sort data=HSQ;   by SEQN; run;
proc sort data=DPQ;   by SEQN; run;
proc sort data=DIQ;   by SEQN; run;
proc sort data=MCQ;   by SEQN; run;
proc sort data=BPQ;   by SEQN; run;
proc sort data=BPX;   by SEQN; run;
proc sort data=CBC;   by SEQN; run;
proc sort data=TCHOL; by SEQN; run;
proc sort data=TRIGLY;by SEQN; run;
proc sort data=HDL;   by SEQN; run;
proc sort data=OHQ;   by SEQN; run;

data nhanes11_14; merge CFQ(in=a) DEMO SMQ ALQ BMX PAQ HEI HSQ DPQ DIQ MCQ BPQ BPX CBC TCHOL TRIGLY HDL OHQ;
by SEQN;
if a;
array covar {*} Agegp racegp sex edugp incomegp smkgp drinkgp bmigp rpagp 
                cogn_cat hsDM hsCVD hsHP hsCancer hsChol /*anxiety*/ depression 
                yvisit perios
                Age bmi WC HEI SBP DBP WBC TCP /*LCP TGP*/ HDP cognition;
ind_missing=0;
do i=1 to dim(covar);
 if covar{i} = . then ind_missing=1;
end; drop i;
    label SEQN='Respondent identification number';
    label Agegp='Age group';
    label racegp='Race';
    label edugp='Education';
    label incomegp='Income:poverty ratio';

    label smkgp='Smoking status';
    label drinkgp='Drinking status';
    label bmigp='Body mass index';
    label WC='Waist Circumference';
    label rpagp='physical activity';
    label hsDM='history of diabetes';
    label hsCVD='history of CVD or stroke';
    label hsHP='history of hypertension';
    label hsCancer='history of cancer';
    label hsChol='history of hypercholesterol';
    label yvisit='Annual visits to dentist';
    label perios = 'Periodontal Destruction';

    label anxiety='Anxiety';
    label depression='Depression';
    label cognition='Total z scores for cognition';

    label SBP="Systolic Blood Pressure";
    label DBP="Diastolic Blood Pressure";
    label WBC="WBC";
/*  label GBP="Fibrinogen";*/
/*  label CRP="CRP";*/
    label TCP="Total Cholesterol";
    label LCP="LDL Cholesterol";
    label HDP="HDL cholesterol";
    label TGP="Triglycerides";
/*  label LPP="Lipoprotein(a)";*/
    label HEI="HEI score";
run;

proc freq data=nhanes11_14; 
 tables Agegp racegp sex edugp incomegp smkgp drinkgp bmigp rpagp 
        cogn_cat hsDM hsCVD hsHP hsCancer hsChol anxiety depression 
        yvisit perios ind_missing/missing; 
run;

proc means data=nhanes11_14 n nmiss min median max mean std;
 var Age bmi WC HEI SBP DBP WBC TCP LCP HDP TGP cognition
     Agegp racegp sex edugp incomegp smkgp drinkgp bmigp rpagp 
     cogn_cat hsDM hsCVD hsHP hsCancer hsChol anxiety depression 
     yvisit perios ind_missing;
run;

data datloc.nhanes11_14; set nhanes11_14; 
 where ind_missing=0;
 keep SEQN Agegp racegp sex edugp incomegp smkgp drinkgp bmigp rpagp 
      cogn_cat hsDM hsCVD hsHP hsCancer hsChol /*anxiety*/ depression 
      yvisit perios
      Age bmi WC HEI SBP DBP WBC TCP /*LCP TGP*/ HDP cognition ind_missing;
run;

proc format;
    value Age2gp 0='<60' 1='60-<70' 2='>=70';
    value racegp 1='white' 2='black' 3='OTHER';
    value sex 1='Male' 2='Female';
    value edugp 1='1_<12 yrs' 2='>=12 yrs';
    value incomegp  1='LOW_PIR=<1.3' 2='MIDDLE_PIR=<3.5' 3='HIGH_PIR>3.5';
    value smkgp 1='NonSmoker' 2='Ever Smoker' 3='Current Smoker';
    value drinkgp 0='NonDrinker' 1='Drinker';
    value bmigp 1='NORMAL' 2='OVERWEIHGT' 3='OBESITY';
    value rpagp 1='Vigorously active' 2='Moderately active' 3='Sedentary'; 
    value yesno 1='YES' 0='NO';
    value yvisit 1='YES' 2='NO';
    value perios 1='Moderate or severe' 0='No disease' ;
run;
data datloc.nhanes11_14svm; set datloc.nhanes11_14; 
 keep racegp sex edugp incomegp smkgp drinkgp rpagp 
      cogn_cat hsDM hsCVD hsHP hsCancer hsChol depression 
      yvisit perios
      Age bmi WC HEI SBP DBP WBC TCP HDP;
run;
